In [1]:
#import eveerything that needed
import re
import pandas as pd
import nltk
from nltk.stem import PorterStemmer as ps
from nltk.corpus import stopwords
import datetime
import string
import matplotlib.pyplot as plt
import numpy as np
import plotly
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go 
plotly.tools.set_credentials_file(username='FangzhengZhang', api_key='JCzdvumGOr8rCKHaeeeK')
In [2]:
#IO data 
table = pd.read_csv('employee_reviews.csv')
table
Out[2]:
Unnamed: 0 company location dates job-title summary pros cons advice-to-mgmt overall-ratings work-balance-stars culture-values-stars carrer-opportunities-stars comp-benefit-stars senior-mangemnet-stars helpful-count link
0 1 google none Dec 11, 2018 Current Employee - Anonymous Employee Best Company to work for People are smart and friendly Bureaucracy is slowing things down none 5.0 4.0 5.0 5.0 4.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
1 2 google Mountain View, CA Jun 21, 2013 Former Employee - Program Manager Moving at the speed of light, burn out is inev... 1) Food, food, food. 15+ cafes on main campus ... 1) Work/life balance. What balance? All those ... 1) Don't dismiss emotional intelligence and ad... 4.0 2.0 3.0 3.0 5.0 3.0 2094 https://www.glassdoor.com/Reviews/Google-Revie...
2 3 google New York, NY May 10, 2014 Current Employee - Software Engineer III Great balance between big-company security and... * If you're a software engineer, you're among ... * It *is* becoming larger, and with it comes g... Keep the focus on the user. Everything else wi... 5.0 5.0 4.0 5.0 5.0 4.0 949 https://www.glassdoor.com/Reviews/Google-Revie...
3 4 google Mountain View, CA Feb 8, 2015 Current Employee - Anonymous Employee The best place I've worked and also the most d... You can't find a more well-regarded company th... I live in SF so the commute can take between 1... Keep on NOT micromanaging - that is a huge ben... 5.0 2.0 5.0 5.0 4.0 5.0 498 https://www.glassdoor.com/Reviews/Google-Revie...
4 5 google Los Angeles, CA Jul 19, 2018 Former Employee - Software Engineer Unique, one of a kind dream job Google is a world of its own. At every other c... If you don't work in MTV (HQ), you will be giv... Promote managers into management for their man... 5.0 5.0 5.0 5.0 5.0 5.0 49 https://www.glassdoor.com/Reviews/Google-Revie...
5 6 google Mountain View, CA Dec 9, 2018 Former Employee - SDE2 NICE working in GOOGLE as an INTERN People are not that busy, so they are nice to ... Food is not good as I expected. People said it... none 5.0 4.0 4.0 4.0 5.0 4.0 1 https://www.glassdoor.com/Reviews/Google-Revie...
6 7 google New York, NY Dec 11, 2018 Current Employee - Software Engineer Software engineer Great working environment. Good work life balance Usual big company problems. Hierarchy. none 5.0 5.0 4.0 4.0 5.0 4.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
7 8 google none Dec 11, 2018 Former Employee - Anonymous Employee great place to work and progress work culture, benefits, growth, people, No cons that i can think of none 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
8 9 google New York, NY Dec 10, 2018 Current Employee - Anonymous Employee Google Surpasses Realistic Expectations Great products. Vision you can feel good about... Younger employees complaining about the compan... none 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
9 10 google none Dec 9, 2018 Current Employee - Anonymous Employee Execellent for engineers Impact driven. Best tech in the world. Size matters. Engineers are a bit disconnected... none 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
10 11 google Mountain View, CA Dec 8, 2018 Current Employee - Anonymous Employee Nice place to work * Smart people * World class infrastructure * ... Giant company with uneven project and team cha... none 5.0 4.0 5.0 4.0 4.0 3.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
11 12 google none Dec 8, 2018 Current Employee - Anonymous Employee Software Engineer * There is endless opportunity * You'll never ... * There's always someone better than you. * Yo... none 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
12 13 google none Dec 4, 2018 Former Employee - Anonymous Employee Best Company to Work For! Atmosphere that promotes the expression of fre... It can be an overwhelming machine at times but... none 5.0 5.0 5.0 5.0 5.0 5.0 1 https://www.glassdoor.com/Reviews/Google-Revie...
13 14 google San Francisco, CA Dec 3, 2018 Current Employee - Sales Operations Analyst Still the best place to work! Amazing company. Still the best place to work. Size, which limits opportunities for extraordi... none 5.0 5.0 5.0 5.0 4.0 4.0 1 https://www.glassdoor.com/Reviews/Google-Revie...
14 15 google none Dec 6, 2018 Current Employee - Anonymous Employee Mba Intern Perks, autonomy, cool products, smart people Slow career progression for non-tech none 5.0 4.0 5.0 3.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
15 16 google none Dec 6, 2018 Current Employee - Anonymous Employee Review The people are great to work with There is lower compensation than expected none 5.0 none none none none none 0 https://www.glassdoor.com/Reviews/Google-Revie...
16 17 google none Nov 29, 2018 Current Employee - Anonymous Employee Review of Google The people are great to work with, good perks. Wide variety in experience based on team. none 5.0 4.0 5.0 5.0 5.0 4.0 2 https://www.glassdoor.com/Reviews/Google-Revie...
17 18 google none Dec 2, 2018 Former Employee - Anonymous Employee Analysts Good managers, benefits , some support, nice a... Long hours, far locations, a lot of work none 5.0 none none 2.0 none none 1 https://www.glassdoor.com/Reviews/Google-Revie...
18 19 google none Dec 1, 2018 Former Employee - Anonymous Employee Great Company With No Down Sides Really fun work environment with startup Really no down sides that I saw. none 5.0 5.0 5.0 5.0 5.0 5.0 1 https://www.glassdoor.com/Reviews/Google-Revie...
19 20 google none Dec 3, 2018 Current Employee - Anonymous Employee Senior Account Manager Great benefits, growth and people to work with... Must be located in an office none 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
20 21 google New York, NY Nov 26, 2018 Current Employee - Designer Great, but great can always be better Great team and support for individuals Lots of bureaucracy, not super diverse none 5.0 3.0 4.0 3.0 4.0 5.0 2 https://www.glassdoor.com/Reviews/Google-Revie...
21 22 google none Dec 2, 2018 Current Employee - Anonymous Employee Great company to work for I love Google a lot Some things are not to great none 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
22 23 google none Dec 1, 2018 Current Employee - Anonymous Employee NaN it's a great place to work! too much red tape on doing even the smallest p... none 5.0 none none 5.0 none none 0 https://www.glassdoor.com/Reviews/Google-Revie...
23 24 google none Nov 30, 2018 Current Employee - Anonymous Employee Software Engineer Awesome job environment to work in Pressure is there for some parts none 5.0 none none none none none 0 https://www.glassdoor.com/Reviews/Google-Revie...
24 25 google Los Angeles, CA Nov 29, 2018 Current Employee - Staff Accountant Accountant I like and learned from the beginning accounting High clerical work in the Sane record Management and Finance must be close relation ... 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
25 26 google none Nov 29, 2018 Current Employee - Anonymous Employee Good Company to Work for - perks are phenomenal, food, health, office - work life balance is hard to manage none 5.0 4.0 5.0 4.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
26 27 google none Nov 28, 2018 Current Employee - Anonymous Employee Great culture Very inspiring engineers to work with. Easy to get lost, as it is such a big company. none 5.0 3.0 5.0 4.0 5.0 3.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
27 28 google San Francisco, CA Nov 17, 2018 Current Employee - Program Manager I Not perfect, but still the best place in the w... - A company culture that encourages dissent, d... - A company with over 90k people at the time o... - Just because a small percentage of the compa... 5.0 5.0 5.0 5.0 5.0 5.0 3 https://www.glassdoor.com/Reviews/Google-Revie...
28 29 google none Nov 27, 2018 Current Employee - Anonymous Employee Great place to work The people are awesome. Lots of perks. Reduce ... There's sometimes too wide a variety of things... none 5.0 4.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
29 30 google none Nov 20, 2018 Former Employee - Anonymous Employee Great Great place to work, free food, and great cult... Big company, prepare to move much slower than ... none 5.0 none none none none none 2 https://www.glassdoor.com/Reviews/Google-Revie...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
67499 67500 microsoft none Jan 13, 2011 Current Employee - Program Manager Good experience Passion for work and company Lot of opportunities. Unable to juggle for per... Not much. Management doesn't need advice. 4.0 4.0 none 4.0 4.5 4.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67500 67501 microsoft none Jan 11, 2011 Current Employee - Architect Started kind of shaky, but things are getting ... Great benefits. no other place can compare. As... Sometimes it feels too big! too many organizat... Be fair when it comes to treating with all you... 3.0 4.0 none 3.0 4.0 2.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67501 67502 microsoft Hyderabad (India) Dec 26, 2010 Former Employee - Operations Analyst I would definitely join the company if I grab ... One word - technologies. We are rapidly grabbi... It takes times to find your way but once you d... Every manager is unique. Unless you really kno... 5.0 5.0 none 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67502 67503 microsoft none Jan 9, 2011 Current Employee - Anonymous Employee Good software company Challenge, professional career growth, high st... Keen competition. Hard working, stressful. none 4.0 4.0 none 3.0 3.5 3.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67503 67504 microsoft none Jan 7, 2011 Current Employee - Anonymous Employee was once a great place to work still some pockets of good ppl. was great now filled w/ lots of ppl who outsou... change out CEO, change approach to review 2.0 2.5 none 1.0 4.0 2.0 2 https://www.glassdoor.com/Reviews/Microsoft-Re...
67504 67505 microsoft Hyderabad (India) Jan 6, 2011 Current Employee - Anonymous Employee Great experience and exposure An employee's choice of company, great HR prac... depending on division, career advancement may ... none 4.0 5.0 none 3.0 5.0 3.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67505 67506 microsoft none Jan 6, 2011 Current Employee - Anonymous Employee Great experience - but much pressure One of the best places to work - profesionally... very pressing and therefore not enough time fo... Management is very professional but it would b... 4.0 2.5 none 3.5 4.5 4.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67506 67507 microsoft none Jan 4, 2011 Former Employee - Account Executive It was always difficult, whether it was a mult... There are some really smart folks working ther... Meetings, meetings and more meetings. Secret a... Do away with annual reviews. Make your manager... 2.0 5.0 none 2.0 4.0 4.0 2 https://www.glassdoor.com/Reviews/Microsoft-Re...
67507 67508 microsoft Hyderabad (India) Jan 3, 2011 Current Employee - Technical Support Engineer Great company 1. Good salary 2. Good benefits 3. Overall ver... It varies from team to team - If you ar estuck... Nothing much - just keep doing whatever you ar... 3.0 3.5 none 3.5 5.0 1.0 1 https://www.glassdoor.com/Reviews/Microsoft-Re...
67508 67509 microsoft none Jan 4, 2011 Former Employee - Technical Specialist Interesting.... :-) Knowledge Sharing Dynamic Teams Great career "... - Grew too fast - Windows & , Windows & Office... - Direct & Manage the talent, not confuse with... 4.0 3.5 none 3.0 4.0 3.5 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67509 67510 microsoft none Jan 4, 2011 Former Employee - Technical Specialist Interesting.... :-) Knowledge Sharing Dynamic Teams Great career "... - Grew too fast - Windows & , Windows & Office... - Direct & Manage the talent, not confuse with... 4.0 3.5 none 3.0 4.0 3.5 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67510 67511 microsoft Hyderabad (India) Jan 4, 2011 Current Employee - Software Development Engine... Work life balance is great, Compensation is no... Best company for entry level Engineer. Good Wo... If you joined on a wrong level, No matter how ... Promotions need to be more transparent, 3.0 4.5 none 2.5 1.0 4.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67511 67512 microsoft Bengaluru (India) Jan 1, 2011 Current Employee - Support Escalation Engineer GTSC highly technical and motivated people one can ... Hire on perception known devil is better than ... respect the skill set and not the perceptions 3.0 3.0 none 3.5 2.0 2.0 1 https://www.glassdoor.com/Reviews/Microsoft-Re...
67512 67513 microsoft none Jan 2, 2011 Former Employee - Anonymous Employee Overall good, however, it depends on group Has great benefit package. Most of the time yo... Takes long time to make a decision. Difficult ... none 4.0 3.5 none 3.5 4.5 3.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67513 67514 microsoft Tokyo (Japan) Jan 2, 2011 Former Employee - Marketing Specialist Good career path You can learn and experience lots of things at... Difficult to work with some senior management ... none 3.0 4.0 none 4.0 5.0 4.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67514 67515 microsoft none Jan 3, 2011 Current Employee - Anonymous Employee A stable company but still is not for lazy people - Good benefit so far - Large, lots of variety - Need to follow many rules/processes Hold their MS stocks longer 4.0 4.0 none 4.0 3.5 4.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67515 67516 microsoft none Dec 29, 2010 Current Employee - Operations Manager A: Who do you work for? Me: Microsoft A: I gue... - good benefits and perks, free Windows Phone ... - your life quality, promotions, recognitions ... you have more people in the team, not only the... 3.0 4.5 none 2.0 2.5 3.0 1 https://www.glassdoor.com/Reviews/Microsoft-Re...
67516 67517 microsoft none Dec 31, 2010 Current Employee - Software Development Engine... Mixed - depends on the group I had one group that was pretty good with orga... Another group was terrible - there was no coor... Try for greater consistency between the teams.... 3.0 2.5 none 3.0 4.5 2.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67517 67518 microsoft none Dec 30, 2010 Current Employee - Anonymous Employee if you can work 10-12 hours everyday on regula... Some really smart people to work with. If you ... For working mothers, hard place to grow and ke... none 3.0 3.0 none 2.5 3.0 2.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67518 67519 microsoft none Dec 29, 2010 Former Employee - Anonymous Employee Not the cult it used to be - far more a job now lots of new stuff almost daily processes are killing it and losing momentum none 3.0 2.5 none 3.0 4.0 2.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67519 67520 microsoft Hyderabad (India) Dec 13, 2010 Current Employee - Anonymous Employee Having fun at Microsoft Unlimited opportunities, fun, great networking... Long hours (but then most companies today requ... none 5.0 5.0 none 4.5 4.5 4.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67520 67521 microsoft none Dec 20, 2010 Current Employee - Anonymous Employee a good learning place It is a big company. So if you really want to ... Like any other big companies, it is sometimes ... none 4.0 3.0 none 2.5 3.0 4.5 1 https://www.glassdoor.com/Reviews/Microsoft-Re...
67521 67522 microsoft none Dec 22, 2010 Former Employee - Senior Project Managet Microsoft Services is bad. MS is a good company but hire wrong people. Wo... Senior management has no leadership and very o... Major management revamp esp in APAC. It's not ... 2.0 3.0 none 1.5 2.0 1.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67522 67523 microsoft none Dec 18, 2010 Current Employee - Technical Sales Manager I started off great some years ago but lately ... Employee empowerment Ability to take your init... Company too slow Bureaucratic Lacking consumer... none 3.0 3.0 none 4.5 2.5 3.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67523 67524 microsoft none Dec 17, 2010 Former Employee - Senior Program Manager Good company. Good benefits package, huge customer base. Politics, very slow, lots of dead wood. Move on. 4.0 3.5 none 3.5 4.0 4.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67524 67525 microsoft none Dec 16, 2010 Former Employee - Anonymous Employee Enriching experience for a beginner but bad fo... -Access to a wide range of technologies, compl... -Testers(SDET's ) do not get as many opportuni... Make the company leaner and Meaner. (which wou... 3.0 3.0 none 4.0 4.0 2.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67525 67526 microsoft none Dec 16, 2010 Current Employee - Senior Marketing Manager A complex and interesting experience - Once you're at Microsoft you can change role... - Be prepared to be flexible - frequent change... none 3.0 1.5 none 2.5 4.0 2.5 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67526 67527 microsoft none Dec 15, 2010 Current Employee - Account Manager Good Place to Work Nice place to work. Good atmosphere with advan... Management confusion at times with vision for ... none 4.0 3.0 none 4.0 4.5 3.5 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67527 67528 microsoft none Dec 15, 2010 Current Employee - Senior Test Lead It's a competitive work place, with overload w... Smart people around you, can learn from them Politics, weak moral, leaning loyalty none 3.0 2.0 none 3.0 3.5 3.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
67528 67529 microsoft none Dec 14, 2010 Former Employee - Senior Director Used to be great Compensation, Health benefits and brand name r... Leadership was better in the late 90's....its ... Good people keep leaving...you have to ask you... 2.0 2.0 none 2.0 4.0 2.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...

67529 rows × 17 columns

Question 1

In [3]:
#print all the important info
company_set = set(table.company)
company_table_dic = {}
company_name_tuple = ('overall','amazon','google','apple','facebook','microsoft','netflix')
company_color_dic = {company_name_tuple[0]:'#000000',company_name_tuple[1]:'#f44542',
                     company_name_tuple[2]:'#f4f141',company_name_tuple[3]:'#41f45e',
                     company_name_tuple[4]:'#2cd5e8',company_name_tuple[5]:'#3513a5',company_name_tuple[6]:'#a5127b'}
amazon_table = table[table.company == 'amazon']
google_table = table[table.company == 'google']
apple_table = table[table.company == 'apple']
facebook_table = table[table.company == 'facebook']
microsoft_table = table[table.company == 'microsoft']
netflix_table = table[table.company == 'netflix']
company_table_dic[company_name_tuple[0]] = table
company_table_dic[company_name_tuple[1]] = amazon_table
company_table_dic[company_name_tuple[2]] = google_table
company_table_dic[company_name_tuple[3]] = apple_table
company_table_dic[company_name_tuple[4]] = facebook_table
company_table_dic[company_name_tuple[5]] = microsoft_table
company_table_dic[company_name_tuple[6]] = netflix_table
In [4]:
#print each small table

for name in company_name_tuple:
    print (name + " table:")
    print ("table length: " + str(len(company_table_dic[name])))
    display(company_table_dic[name].head())
overall table:
table length: 67529
Unnamed: 0 company location dates job-title summary pros cons advice-to-mgmt overall-ratings work-balance-stars culture-values-stars carrer-opportunities-stars comp-benefit-stars senior-mangemnet-stars helpful-count link
0 1 google none Dec 11, 2018 Current Employee - Anonymous Employee Best Company to work for People are smart and friendly Bureaucracy is slowing things down none 5.0 4.0 5.0 5.0 4.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
1 2 google Mountain View, CA Jun 21, 2013 Former Employee - Program Manager Moving at the speed of light, burn out is inev... 1) Food, food, food. 15+ cafes on main campus ... 1) Work/life balance. What balance? All those ... 1) Don't dismiss emotional intelligence and ad... 4.0 2.0 3.0 3.0 5.0 3.0 2094 https://www.glassdoor.com/Reviews/Google-Revie...
2 3 google New York, NY May 10, 2014 Current Employee - Software Engineer III Great balance between big-company security and... * If you're a software engineer, you're among ... * It *is* becoming larger, and with it comes g... Keep the focus on the user. Everything else wi... 5.0 5.0 4.0 5.0 5.0 4.0 949 https://www.glassdoor.com/Reviews/Google-Revie...
3 4 google Mountain View, CA Feb 8, 2015 Current Employee - Anonymous Employee The best place I've worked and also the most d... You can't find a more well-regarded company th... I live in SF so the commute can take between 1... Keep on NOT micromanaging - that is a huge ben... 5.0 2.0 5.0 5.0 4.0 5.0 498 https://www.glassdoor.com/Reviews/Google-Revie...
4 5 google Los Angeles, CA Jul 19, 2018 Former Employee - Software Engineer Unique, one of a kind dream job Google is a world of its own. At every other c... If you don't work in MTV (HQ), you will be giv... Promote managers into management for their man... 5.0 5.0 5.0 5.0 5.0 5.0 49 https://www.glassdoor.com/Reviews/Google-Revie...
amazon table:
table length: 26430
Unnamed: 0 company location dates job-title summary pros cons advice-to-mgmt overall-ratings work-balance-stars culture-values-stars carrer-opportunities-stars comp-benefit-stars senior-mangemnet-stars helpful-count link
7819 7820 amazon Phoenix, AZ None Current Employee - Software Development Manager An Amazing Place to Work I've been at Amazon for a month now, and I've ... No cons, so far - seriously. Like I said, I'm ... none 5.0 4.0 5.0 5.0 5.0 5.0 580 https://www.glassdoor.com/Reviews/Amazon-Revie...
7820 7821 amazon Sunnyvale, CA Dec 10, 2018 Current Employee - Manager, Software Developme... Software Development Engineering Manager Work hard, have fun, make history. Be proud of... RUS grand each year related to last year's sto... none 5.0 5.0 5.0 5.0 4.0 5.0 0 https://www.glassdoor.com/Reviews/Amazon-Revie...
7821 7822 amazon Seattle, WA Jan 10, 2016 Current Employee - Anonymous Employee You Get What You Put In Really smart people, a lot of opportunity for ... You have to be self motivated. NO ONE will hol... More on-boarding training before new employees... 5.0 4.0 5.0 5.0 5.0 5.0 1759 https://www.glassdoor.com/Reviews/Amazon-Revie...
7822 7823 amazon Seattle, WA Feb 20, 2016 Current Employee - Senior Engineering Manager Exciting Work, Abusive Culture Jeff Bezos and his "S-Team" are brilliant and ... The management process is abusive, and I'm cur... Don't pretend that the recent NY Times article... 3.0 3.0 3.0 3.0 4.0 4.0 1404 https://www.glassdoor.com/Reviews/Amazon-Revie...
7823 7824 amazon Seattle, WA Dec 17, 2013 Current Employee - Software Development Manager Can be amazing for some people, horrible for o... Amazon is doing lot's of cool stuff...but lots... - You're responsible for your own career progr... Stack ranking is a horrible practice since it'... 4.0 3.0 3.0 5.0 5.0 4.0 1275 https://www.glassdoor.com/Reviews/Amazon-Revie...
google table:
table length: 7819
Unnamed: 0 company location dates job-title summary pros cons advice-to-mgmt overall-ratings work-balance-stars culture-values-stars carrer-opportunities-stars comp-benefit-stars senior-mangemnet-stars helpful-count link
0 1 google none Dec 11, 2018 Current Employee - Anonymous Employee Best Company to work for People are smart and friendly Bureaucracy is slowing things down none 5.0 4.0 5.0 5.0 4.0 5.0 0 https://www.glassdoor.com/Reviews/Google-Revie...
1 2 google Mountain View, CA Jun 21, 2013 Former Employee - Program Manager Moving at the speed of light, burn out is inev... 1) Food, food, food. 15+ cafes on main campus ... 1) Work/life balance. What balance? All those ... 1) Don't dismiss emotional intelligence and ad... 4.0 2.0 3.0 3.0 5.0 3.0 2094 https://www.glassdoor.com/Reviews/Google-Revie...
2 3 google New York, NY May 10, 2014 Current Employee - Software Engineer III Great balance between big-company security and... * If you're a software engineer, you're among ... * It *is* becoming larger, and with it comes g... Keep the focus on the user. Everything else wi... 5.0 5.0 4.0 5.0 5.0 4.0 949 https://www.glassdoor.com/Reviews/Google-Revie...
3 4 google Mountain View, CA Feb 8, 2015 Current Employee - Anonymous Employee The best place I've worked and also the most d... You can't find a more well-regarded company th... I live in SF so the commute can take between 1... Keep on NOT micromanaging - that is a huge ben... 5.0 2.0 5.0 5.0 4.0 5.0 498 https://www.glassdoor.com/Reviews/Google-Revie...
4 5 google Los Angeles, CA Jul 19, 2018 Former Employee - Software Engineer Unique, one of a kind dream job Google is a world of its own. At every other c... If you don't work in MTV (HQ), you will be giv... Promote managers into management for their man... 5.0 5.0 5.0 5.0 5.0 5.0 49 https://www.glassdoor.com/Reviews/Google-Revie...
apple table:
table length: 12950
Unnamed: 0 company location dates job-title summary pros cons advice-to-mgmt overall-ratings work-balance-stars culture-values-stars carrer-opportunities-stars comp-benefit-stars senior-mangemnet-stars helpful-count link
36649 36650 apple none Dec 11, 2018 Current Employee - Anonymous Employee GIS & Data Analyst Fun work environment, great managers Sometimes there's a high volume of work, but s... none 5.0 none none none none none 0 https://www.glassdoor.com/Reviews/Apple-Review...
36650 36651 apple none Aug 28, 2014 Current Employee - Anonymous Employee Challenging, Rewarding, but ZERO work/life bal... We work with geniuses - in every department, W... ZERO ZERO ZERO work/life balance. Execs have b... none 4.0 1.0 3.0 3.0 3.0 3.0 822 https://www.glassdoor.com/Reviews/Apple-Review...
36651 36652 apple Tempe, AZ Mar 10, 2017 Current Employee - Specialist A Company as Meticulous as Their Products! Competitive Pay, Great Benefits, Amazing Peopl... None! I had one of the best times ever at this... You guys are awesome. Keep up the good work! 5.0 4.0 5.0 5.0 4.0 5.0 248 https://www.glassdoor.com/Reviews/Apple-Review...
36652 36653 apple Lakewood, CO May 13, 2015 Current Employee - Apple At Home Advisor At-Home-Advisor The position is at home. The company is AMAZIN... The schedule changes every 90 days and you hav... The management consists of a team manager and ... 5.0 3.0 5.0 5.0 4.0 5.0 442 https://www.glassdoor.com/Reviews/Apple-Review...
36653 36654 apple none Dec 8, 2018 Current Employee - Anonymous Employee Good Salary, Hours, Time, Career opportunities They only offer part-time hours none 5.0 3.0 3.0 2.0 5.0 3.0 1 https://www.glassdoor.com/Reviews/Apple-Review...
facebook table:
table length: 1590
Unnamed: 0 company location dates job-title summary pros cons advice-to-mgmt overall-ratings work-balance-stars culture-values-stars carrer-opportunities-stars comp-benefit-stars senior-mangemnet-stars helpful-count link
34249 34250 facebook Menlo Park, CA Dec 11, 2018 Current Employee - Product Operations 2nd Job out of college --> Great Experience Context: I worked in operations - Very entrepr... - Very entrepreneurial culture: There is no fo... none 5.0 4.0 4.0 5.0 5.0 4.0 0 https://www.glassdoor.com/Reviews/Facebook-Rev...
34250 34251 facebook Menlo Park, CA Feb 16, 2017 Current Employee - Anonymous Employee Fast paced company with high expectations, but... - Incredible benefits - Excellent compensation... - If you don't learn to make the work life bal... Keep doing what you're doing, keep an eye on e... 5.0 5.0 5.0 5.0 5.0 5.0 294 https://www.glassdoor.com/Reviews/Facebook-Rev...
34251 34252 facebook Menlo Park, CA Feb 16, 2016 Current Employee - Site Selection Program Manager Pleasantly Surprised I'm a 55 year old working in a company where t... Like any tech company, the work is big and com... Keep doing what you're doing and fight hard to... 5.0 4.0 5.0 4.0 5.0 5.0 363 https://www.glassdoor.com/Reviews/Facebook-Rev...
34252 34253 facebook Menlo Park, CA Dec 3, 2018 Current Employee - Anonymous Employee Don't believe all of the negative press Having worked at Facebook for a number of year... Particularly as of late, Facebook has been sub... none 5.0 4.0 5.0 5.0 5.0 5.0 6 https://www.glassdoor.com/Reviews/Facebook-Rev...
34253 34254 facebook Seattle, WA Dec 5, 2018 Current Employee - Staff Software Engineer Continues to be an amazing place to be Smart people who were chosen to be good to wor... Took a year and a half to learn to play defens... I'd love to see management be half as open wit... 5.0 5.0 5.0 5.0 4.0 5.0 4 https://www.glassdoor.com/Reviews/Facebook-Rev...
microsoft table:
table length: 17930
Unnamed: 0 company location dates job-title summary pros cons advice-to-mgmt overall-ratings work-balance-stars culture-values-stars carrer-opportunities-stars comp-benefit-stars senior-mangemnet-stars helpful-count link
49599 49600 microsoft none Dec 11, 2018 Current Employee - Anonymous Employee Microsoft Culture, role impact, mission driven, collabor... Volume of work is sometimes unmanageable, none 5.0 4.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
49600 49601 microsoft Redmond, WA Jan 28, 2013 Current Employee - Anonymous Employee Thoughts after 10 years.... 1. If you love tech, this is a great place. No... Brand on Your Resume: After many years of losi... I'll type it here - but I don't they are liste... 4.0 4.0 2.0 2.0 4.0 none 1439 https://www.glassdoor.com/Reviews/Microsoft-Re...
49601 49602 microsoft Redmond, WA Dec 9, 2018 Current Employee - Anonymous Employee Technical Account Manager Great company and Great people I see no cons at this time Keep up the great work 5.0 4.0 5.0 5.0 5.0 5.0 1 https://www.glassdoor.com/Reviews/Microsoft-Re...
49602 49603 microsoft Chicago, IL Dec 9, 2018 Current Employee - CSA Great company Benefits, work-life balance, tons of internal ... Can't think of any right now none 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
49603 49604 microsoft none Dec 9, 2018 Current Employee - Anonymous Employee Great Company to work for Smart people, work life balance, growth mindse... Can be hard to transfer internally none 5.0 5.0 5.0 4.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Microsoft-Re...
netflix table:
table length: 810
Unnamed: 0 company location dates job-title summary pros cons advice-to-mgmt overall-ratings work-balance-stars culture-values-stars carrer-opportunities-stars comp-benefit-stars senior-mangemnet-stars helpful-count link
35839 35840 netflix none Oct 22, 2018 Current Employee - Anonymous Employee Executive Assistant Open communication. Top of market pay. Extreme... Very meeting heavy, which isn't a con, but doe... Please continue to do what you've been doing. 5.0 4.0 5.0 5.0 4.0 4.0 2 https://www.glassdoor.com/Reviews/Netflix-Revi...
35840 35841 netflix none Sep 20, 2018 Current Employee - Anonymous Employee How much does a functioning human cost? - Paycheck - So many good people - Such a grea... I have been working for a year at Netflix. I'... Ask yourself those questions 3.0 2.0 1.0 5.0 5.0 4.0 52 https://www.glassdoor.com/Reviews/Netflix-Revi...
35841 35842 netflix none Dec 5, 2018 Former Employee - Anonymous Employee QA Tester Contractor Great personalities, great work culture, free ... None really at all at the moment none 5.0 none none none none none 1 https://www.glassdoor.com/Reviews/Netflix-Revi...
35842 35843 netflix Los Angeles, CA Nov 30, 2018 Former Employee - Video Editor Netflix Editor Excellent Projects with Pros from throughout t... Still setting the feel of the company none 5.0 4.0 3.0 4.0 4.0 3.0 0 https://www.glassdoor.com/Reviews/Netflix-Revi...
35843 35844 netflix none Nov 26, 2018 Former Employee - Anonymous Employee I love it It was great and i loved it Not enough free snack especially chips none 5.0 5.0 5.0 5.0 5.0 5.0 0 https://www.glassdoor.com/Reviews/Netflix-Revi...
In [5]:
#plot the number of reviews for each company 
review_count_dic = {}
review_count_dic['amazon'] = len(amazon_table)
review_count_dic['apple'] = len(apple_table) 
review_count_dic['facebook'] = len(facebook_table)
review_count_dic['google'] = len(google_table)
review_count_dic['microsoft'] = len(microsoft_table)
review_count_dic['netflix'] = len(netflix_table)
In [6]:
#plot the number of review for each company
height = list(review_count_dic.values())
bars = list(review_count_dic.keys())
y_pos = np.arange(len(bars))
plt.figure(figsize=(15,10))
# Create bars
plt.bar(y_pos, height)
 
# Create names on the x-axis
plt.xticks(y_pos, bars)
plt.xticks(fontsize=20)

# Show graphic
plt.show()
In [7]:
date_look_up_dic = {"Jan": 1 ,"Feb": 2 ,"Mar": 3 ,"Apr": 4, "May": 5, "Jun": 6,"Jul": 7,"Aug": 8,"Sep": 9,"Oct": 10,"Nov": 11,"Dec": 12}
def change_date_format(in_str):
    '''Dec 11, 2018  ->  2018 12 11'''
    str_part = re.split(' |, ',in_str)
    r_str = ''+ str_part[3] + '-' + str(date_look_up_dic[str_part[1]]) + '-' + str_part[2]
    try:
        date_time_obj = datetime.datetime.strptime(r_str, '%Y-%m-%d')
    except ValueError:
        date_time_obj = datetime.datetime.strptime("6666-6-6", '%Y-%m-%d')
    date_time_obj = date_time_obj.date()
    return date_time_obj
    
def clear_date_none(temp_table):
    return_table = temp_table[temp_table.dates != 'None']
    #todo change the date format 
    return_table.dates = return_table.dates.map(change_date_format)
    return_table = return_table.sort_values(by='dates', ascending=True)
    return return_table
In [8]:
#clean the tables and sort them by date
amazon_date_clean_table = clear_date_none(amazon_table)
apple_date_clean_table = clear_date_none(apple_table)
facebook_date_clean_table = clear_date_none(facebook_table)
google_date_clean_table = clear_date_none(google_table)
microsoft_date_clean_table = clear_date_none(microsoft_table)
netflix_date_clean_table = clear_date_none(netflix_table)
c:\users\fangz\appdata\local\programs\python\python37\lib\site-packages\pandas\core\generic.py:5096: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [9]:
def count_dates(temp_table):
    temp_time_dic = {}
    for i in range(len(temp_table)):
        row = temp_table.iloc[i]
        if str(row.dates) not in temp_time_dic:
            temp_time_dic[str(row.dates)] = 1
        else:
            temp_time_dic[str(row.dates)]+=1
    return temp_time_dic
In [10]:
#big file, take long time to run!!!!
#count the time line 
amazon_time_line_dic = count_dates(amazon_date_clean_table)
apple_time_line_dic = count_dates(apple_date_clean_table)
facebook_time_line_dic = count_dates(facebook_date_clean_table)
google_time_line_dic = count_dates(google_date_clean_table)
microsoft_time_line_dic = count_dates(microsoft_date_clean_table)
netflix_time_line_dic = count_dates(netflix_date_clean_table)
In [11]:
amazon_date_review_table = pd.DataFrame.from_dict({'Dates':list(amazon_time_line_dic.keys()) , 'Amazon_Reviews': list(amazon_time_line_dic.values())})
apple_date_review_table = pd.DataFrame.from_dict({'Dates':list(apple_time_line_dic.keys()) , 'Apple_Reviews': list(apple_time_line_dic.values())})
facebook_date_review_table = pd.DataFrame.from_dict({'Dates':list(facebook_time_line_dic.keys()) , 'Facebook_Reviews': list(facebook_time_line_dic.values())})
google_date_review_table = pd.DataFrame.from_dict({'Dates':list(google_time_line_dic.keys()) , 'Google_Reviews': list(google_time_line_dic.values())})
microsoft_date_review_table = pd.DataFrame.from_dict({'Dates':list(microsoft_time_line_dic.keys()) , 'Microsoft_Reviews': list(microsoft_time_line_dic.values())})
netflix_date_review_table = pd.DataFrame.from_dict({'Dates':list(netflix_time_line_dic.keys()) , 'Netflix_Reviews': list(netflix_time_line_dic.values())})
In [12]:
total_date_review_table = pd.merge(amazon_date_review_table, apple_date_review_table, how='outer', on=['Dates'])
total_date_review_table = pd.merge(total_date_review_table, facebook_date_review_table, how='outer', on=['Dates'])
total_date_review_table = pd.merge(total_date_review_table, google_date_review_table, how='outer', on=['Dates'])
total_date_review_table = pd.merge(total_date_review_table, microsoft_date_review_table, how='outer', on=['Dates'])
total_date_review_table = pd.merge(total_date_review_table, netflix_date_review_table, how='outer', on=['Dates'])
total_date_review_table = total_date_review_table.sort_values(by=['Dates'])
total_date_review_table = total_date_review_table.iloc[:-1]
total_date_review_table.iloc[1:].plot(kind = 'line',x='Dates', y=['Amazon_Reviews', 'Apple_Reviews','Facebook_Reviews', 'Google_Reviews','Microsoft_Reviews', 'Netflix_Reviews'],
                                      fontsize=20,figsize=(20,10),color=['#f44542','#f4f141','#41f45e','#2cd5e8','#3513a5','#a5127b']).legend(fontsize=20)
Out[12]:
<matplotlib.legend.Legend at 0x1f99717a8d0>
In [13]:
amazon_date_review_table.iloc[:-1].plot(kind = 'line',x='Dates', y='Amazon_Reviews',fontsize=20,figsize=(20,10),color='#f44542').legend(fontsize=20)
apple_date_review_table.iloc[:-1].plot(kind = 'line',x='Dates', y='Apple_Reviews',fontsize=20,figsize=(20,10),color='#f4f141').legend(fontsize=20)
facebook_date_review_table.iloc[:-1].plot(kind = 'line',x='Dates', y='Facebook_Reviews',fontsize=20,figsize=(20,10),color='#41f45e').legend(fontsize=20)
google_date_review_table.iloc[:-1].plot(kind = 'line',x='Dates', y='Google_Reviews',fontsize=20,figsize=(20,10),color='#2cd5e8').legend(fontsize=20)
microsoft_date_review_table.iloc[:-1].plot(kind = 'line',x='Dates', y='Microsoft_Reviews',fontsize=20,figsize=(20,10),color='#3513a5').legend(fontsize=20)
netflix_date_review_table.iloc[:-1].plot(kind = 'line',x='Dates', y='Netflix_Reviews',fontsize=20,figsize=(20,10),color='#a5127b').legend(fontsize=20)
Out[13]:
<matplotlib.legend.Legend at 0x1f991dae7b8>

Question 2

In [ ]:
 

Question 3

In [14]:
company_name_tuple
Out[14]:
('overall', 'amazon', 'google', 'apple', 'facebook', 'microsoft', 'netflix')
In [15]:
pro_str_list_dic = {}
con_str_list_dic = {}
for name in company_name_tuple:
    pro_str_list_dic[name] = list(company_table_dic[name].pros)
    con_str_list_dic[name] = list(company_table_dic[name].cons)
    print("Length of "+ name +" pro: " + str(len(pro_str_list_dic[name])))
    print("Length of "+ name +" con: " + str(len(con_str_list_dic[name])))
Length of overall pro: 67529
Length of overall con: 67529
Length of amazon pro: 26430
Length of amazon con: 26430
Length of google pro: 7819
Length of google con: 7819
Length of apple pro: 12950
Length of apple con: 12950
Length of facebook pro: 1590
Length of facebook con: 1590
Length of microsoft pro: 17930
Length of microsoft con: 17930
Length of netflix pro: 810
Length of netflix con: 810
In [16]:
#it is core function to do big data:

stemmer = ps()


def remove_non_english_words(words):
    clean_words = []
    englis_words = nltk.corpus.words.words('en')
    for word in words:
        if word in englis_words:
            clean_words.append(word)
    return clean_words
    

def parse_as_words(sentence):
    return_list = []
    sentence = sentence.lower()
    sentence = sentence.translate(str.maketrans({key: None for key in string.punctuation}))
    words = nltk.word_tokenize(sentence)
    for word in words:
        if not word.isdigit():
            return_list.append(word)
    return return_list

def parse_as_root_phrase(sentence):
    phrase_list = []
    words = parse_as_words(sentence)
    words = back_to_root(words)
    #words = remove_non_english_words(words)
    tag_list = nltk.pos_tag(words)
    #get the phrases
    for i in range(len(tag_list)):
        if i+3 < len(tag_list):
            if tag_list[i][1] == "JJ" and tag_list[i+1][1] == "NN" and tag_list[i+2][1] == "CC"and tag_list[i+3][1] == "NN":
                phrase_list.append(str(tag_list[i][0]) + ' ' + str(tag_list[i+1][0]) + ' ' + str(tag_list[i+2][0])+ ' ' + str(tag_list[i+3][0]))
        
        if i+2 < len(tag_list):
            # at least 3 words left
            #(tag_list[i][1] == "JJ" and tag_list[i+1][1] == "NN" and tag_list[i+2][1] == "NN") or 
            if (tag_list[i][1] == "NN" and tag_list[i+1][1] == "VBP" and tag_list[i+2][1] == "JJ"):
                phrase_list.append(str(tag_list[i][0]) + ' ' + str(tag_list[i+1][0]) + ' ' + str(tag_list[i+2][0]))
           
        if i+1 < len(tag_list):
            # at least 2 words left
            if tag_list[i][1] == "JJ" and tag_list[i+1][1] == "NN":
                phrase_list.append(str(tag_list[i][0] + ' ' + str(tag_list[i+1][0])))
    
    return phrase_list

def parse_as_phrase(sentence):
    phrase_list = []
    words = parse_as_words(sentence)
    #words = back_to_root(words)
    #words = remove_non_english_words(words)
    tag_list = nltk.pos_tag(words)
    #get the phrases
    for i in range(len(tag_list)):
        if i+3 < len(tag_list):
            if tag_list[i][1] == "JJ" and tag_list[i+1][1] == "NN" and tag_list[i+2][1] == "CC"and tag_list[i+3][1] == "NN":
                phrase_list.append(str(tag_list[i][0]) + ' ' + str(tag_list[i+1][0]) + ' ' + str(tag_list[i+2][0])+ ' ' + str(tag_list[i+3][0]))
        
        if i+2 < len(tag_list):
            # at least 3 words left
            #(tag_list[i][1] == "JJ" and tag_list[i+1][1] == "NN" and tag_list[i+2][1] == "NN") or 
            if (tag_list[i][1] == "NN" and tag_list[i+1][1] == "VBP" and tag_list[i+2][1] == "JJ"):
                phrase_list.append(str(tag_list[i][0]) + ' ' + str(tag_list[i+1][0]) + ' ' + str(tag_list[i+2][0]))
           
        if i+1 < len(tag_list):
            # at least 2 words left
            if tag_list[i][1] == "JJ" and tag_list[i+1][1] == "NN":
                phrase_list.append(str(tag_list[i][0] + ' ' + str(tag_list[i+1][0])))
    
    return phrase_list





def back_to_root(words):
    return list(map(stemmer.stem,words))


def count_words(words):
    temp_word_count_dic = {}
    for word in words:
        if word not in temp_word_count_dic:
            temp_word_count_dic[word] = 1
        else:
            temp_word_count_dic[word] += 1
    return temp_word_count_dic



def parse_data_clean_and_count(sentence_list,isroot,isphrase,isrootphrase):
    words_counter_dic = {}
    if not isphrase:
        list_of_words_list = list(map(parse_as_words, sentence_list))
    elif isrootphrase:
        list_of_words_list = list(map(parse_as_root_phrase, sentence_list))
    else:
        list_of_words_list = list(map(parse_as_phrase, sentence_list))
        
        
    if isroot and not isphrase:
        list_of_ready_to_count_list = list(map(back_to_root, list_of_words_list))
    else:
        list_of_ready_to_count_list = list_of_words_list
        
    counter_dic_list =  list(map(count_words , list_of_ready_to_count_list))
    
    for dic in counter_dic_list:
        for key in dic:
            if key not in words_counter_dic:
                words_counter_dic[key] = dic[key]
            else:
                words_counter_dic[key] += dic[key]
    return words_counter_dic


def remove_stopWords_from_word_dic(input_dic):
    return_dic = {}
    stop_words = set(stopwords.words('english'))
    for key in input_dic:
        if key not in stop_words:
            return_dic[key] = input_dic[key]
    return return_dic

def remove_non_englist_words_from_word_dic(input_dic):
    """
    If the keys (words) in input_dic are not in root form, it may delete wrong words
    such as "Benefits"
    """
    return_dic = {}
    englis_words = nltk.corpus.words.words('en')
    for key in input_dic:
        if key in englis_words:
            return_dic[key] = input_dic[key]
    return return_dic
    
In [17]:
# wrap up functions
def parse_data_as_words_count(sentence_list):
    return parse_data_clean_and_count(sentence_list,False,False,False)
    
def parse_data_as_rootWords_count(sentence_list):
    return parse_data_clean_and_count(sentence_list,True,False,False)
    
def parse_data_as_words_remove_stopWord_count(sentence_list):
    t = parse_data_clean_and_count(sentence_list,False,False,False)
    return remove_stopWords_from_word_dic(t)

def parse_data_as_rootwords_remove_stopWord_count(sentence_list):
    t = parse_data_clean_and_count(sentence_list,True,False,False)
    return remove_stopWords_from_word_dic(t)

def parse_data_as_words_remove_non_englist_words_count(sentence_list):
    t = parse_data_clean_and_count(sentence_list,False,False,False)
    return remove_non_englist_words_from_word_dic(t)

def parse_data_as_rootwords_remove_non_englist_words_count(sentence_list):
    t = parse_data_clean_and_count(sentence_list,True,False,False)
    return remove_non_englist_words_from_word_dic(t)

def parse_data_as_words_remove_sotpWord_non_englist_words_count(sentence_list):
    t = parse_data_clean_and_count(sentence_list,False,False,False)
    t = remove_stopWords_from_word_dic(t)
    return remove_non_englist_words_from_word_dic(t)

def parse_data_as_rootwords_remove_sotpWord_non_englist_words_count(sentence_list):
    t = parse_data_clean_and_count(sentence_list,True,False,False)
    t = remove_stopWords_from_word_dic(t)
    return remove_non_englist_words_from_word_dic(t)

def parse_data_as_root_phrase_count(sentence_list):
    return parse_data_clean_and_count(sentence_list,False,True,True)

def parse_data_as_phrase_count(sentence_list):
    return parse_data_clean_and_count(sentence_list,False,True,False)
In [18]:
#start to count all words
pro_word_count_dic_of_dic = {}
con_word_count_dic_of_dic = {}

for name in company_name_tuple:
    pro_word_count_dic_of_dic[name] = parse_data_as_words_count(pro_str_list_dic[name])
    con_word_count_dic_of_dic[name] = parse_data_as_words_count(con_str_list_dic[name])
In [19]:
#build data fram to result of count all words
pro_word_count_table_dic = {}
con_word_count_table_dic = {}

for name in company_name_tuple:
    pro_word_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_pro_word'):list(pro_word_count_dic_of_dic[name].keys()) , 'counts': list(pro_word_count_dic_of_dic[name].values())})
    con_word_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_con_word'):list(con_word_count_dic_of_dic[name].keys()) , 'counts': list(con_word_count_dic_of_dic[name].values())})
    pro_word_count_table_dic[name] = pro_word_count_table_dic[name].sort_values(by='counts', ascending=False)
    con_word_count_table_dic[name] = con_word_count_table_dic[name].sort_values(by='counts', ascending=False)
    
    pro_word_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_pro_word', y='counts',title=name + " pro words cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    con_word_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_con_word', y='counts',title=name + " con words cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    
       
#pro_word_count_table_dic[company_name_tuple[0]].iloc[0:20].plot(kind = 'bar',x=company_name_tuple[0]+'_pro_word', y='counts',title=None,figsize=(20,10),fontsize=20)
In [23]:
#start to count non stop words
pro_word_remove_stopword_count_dic_of_dic = {}
con_word_remove_stopword_count_dic_of_dic = {}

for name in company_name_tuple:
    pro_word_remove_stopword_count_dic_of_dic[name] = parse_data_as_words_remove_stopWord_count(pro_str_list_dic[name])
    con_word_remove_stopword_count_dic_of_dic[name] = parse_data_as_words_remove_stopWord_count(con_str_list_dic[name])
In [24]:
#build data fram to result of count non stop words
pro_non_stop_word_count_table_dic = {}
con_non_stop_word_count_table_dic = {}

for name in company_name_tuple:
    pro_non_stop_word_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_pro_non_stop_word'):list(pro_word_remove_stopword_count_dic_of_dic[name].keys()) , 'counts': list(pro_word_remove_stopword_count_dic_of_dic[name].values())})
    con_non_stop_word_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_con_non_stop_word'):list(con_word_remove_stopword_count_dic_of_dic[name].keys()) , 'counts': list(con_word_remove_stopword_count_dic_of_dic[name].values())})
    pro_non_stop_word_count_table_dic[name] = pro_non_stop_word_count_table_dic[name].sort_values(by='counts', ascending=False)
    con_non_stop_word_count_table_dic[name] = con_non_stop_word_count_table_dic[name].sort_values(by='counts', ascending=False)
    
    pro_non_stop_word_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_pro_non_stop_word', y='counts',title=name + " _pro_non_stop_word cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    con_non_stop_word_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_con_non_stop_word', y='counts',title=name + " _con_non_stop_word cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    
In [ ]:
 
In [25]:
#start to count non stop root words
pro_rootword_remove_stopword_count_dic_of_dic = {}
con_rootword_remove_stopword_count_dic_of_dic = {}

for name in company_name_tuple:
    pro_rootword_remove_stopword_count_dic_of_dic[name] = parse_data_as_rootwords_remove_stopWord_count(pro_str_list_dic[name])
    con_rootword_remove_stopword_count_dic_of_dic[name] = parse_data_as_rootwords_remove_stopWord_count(con_str_list_dic[name])
In [26]:
#build data fram to result of count non stop root words
pro_non_stop_root_word_count_table_dic = {}
con_non_stop_root_word_count_table_dic = {}

for name in company_name_tuple:
    pro_non_stop_root_word_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_pro_non_stop_root_word'):list(pro_rootword_remove_stopword_count_dic_of_dic[name].keys()) , 'counts': list(pro_rootword_remove_stopword_count_dic_of_dic[name].values())})
    con_non_stop_root_word_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_con_non_stop_root_word'):list(con_rootword_remove_stopword_count_dic_of_dic[name].keys()) , 'counts': list(con_rootword_remove_stopword_count_dic_of_dic[name].values())})
    pro_non_stop_root_word_count_table_dic[name] = pro_non_stop_root_word_count_table_dic[name].sort_values(by='counts', ascending=False)
    con_non_stop_root_word_count_table_dic[name] = con_non_stop_root_word_count_table_dic[name].sort_values(by='counts', ascending=False)
    print(name)
    print("pro words: " + str(list(pro_non_stop_root_word_count_table_dic[name].iloc[0:10][name+'_pro_non_stop_root_word'])))
    print("con words: " + str(list(con_non_stop_root_word_count_table_dic[name].iloc[0:10][name+'_con_non_stop_root_word'])))
    pro_non_stop_root_word_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_pro_non_stop_root_word', y='counts',title=name + " _pro_non_stop_root_word cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    con_non_stop_root_word_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_con_non_stop_root_word', y='counts',title=name + " _con_non_stop_root_word cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    
overall
pro words: ['work', 'great', 'good', 'peopl', 'benefit', 'compani', 'lot', 'veri', 'opportun', 'pay']
con words: ['work', 'manag', 'get', 'compani', 'time', 'veri', 'peopl', 'hour', 'thi', 'employe']
amazon
pro words: ['work', 'good', 'great', 'peopl', 'compani', 'benefit', 'pay', 'amazon', 'lot', 'opportun']
con words: ['work', 'manag', 'get', 'time', 'hour', 'veri', 'amazon', 'peopl', 'compani', 'employe']
google
pro words: ['work', 'great', 'peopl', 'good', 'benefit', 'compani', 'perk', 'googl', 'food', 'veri']
con words: ['work', 'compani', 'manag', 'get', 'googl', 'peopl', 'veri', 'time', 'lot', 'big']
apple
pro words: ['great', 'work', 'benefit', 'peopl', 'good', 'appl', 'compani', 'product', 'pay', 'environ']
con words: ['work', 'manag', 'appl', 'time', 'get', 'retail', 'veri', 'custom', 'compani', 'hour']
facebook
pro words: ['work', 'peopl', 'great', 'compani', 'cultur', 'facebook', 'benefit', 'perk', 'veri', 'lot']
con words: ['work', 'compani', 'peopl', 'manag', 'get', 'thi', 'team', 'facebook', 'veri', 'time']
microsoft
pro words: ['work', 'great', 'good', 'benefit', 'peopl', 'compani', 'opportun', 'lot', 'veri', 'microsoft']
con words: ['manag', 'work', 'compani', 'get', 'veri', 'peopl', 'team', 'polit', 'microsoft', 'time']
netflix
pro words: ['work', 'great', 'compani', 'pay', 'netflix', 'good', 'peopl', 'free', 'cultur', 'veri']
con words: ['manag', 'work', 'peopl', 'get', 'wa', 'netflix', 'time', 'thi', 'compani', 'job']
In [27]:
#start to count root phrase
pro_root_phrase_count_dic_of_dic = {}
con_root_phrase_count_dic_of_dic = {}

for name in company_name_tuple:
    pro_root_phrase_count_dic_of_dic[name] = parse_data_as_root_phrase_count(pro_str_list_dic[name])
    con_root_phrase_count_dic_of_dic[name] = parse_data_as_root_phrase_count(con_str_list_dic[name])
In [28]:
#build data fram to result of count root phrase
pro_root_phrase_count_table_dic = {}
con_root_phrase_count_table_dic = {}

for name in company_name_tuple:
    pro_root_phrase_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_pro_root_phrase'):list(pro_root_phrase_count_dic_of_dic[name].keys()) , 'counts': list(pro_root_phrase_count_dic_of_dic[name].values())})
    con_root_phrase_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_con_root_phrase'):list(con_root_phrase_count_dic_of_dic[name].keys()) , 'counts': list(con_root_phrase_count_dic_of_dic[name].values())})
    pro_root_phrase_count_table_dic[name] = pro_root_phrase_count_table_dic[name].sort_values(by='counts', ascending=False)
    con_root_phrase_count_table_dic[name] = con_root_phrase_count_table_dic[name].sort_values(by='counts', ascending=False)
    print(name)
    print("pro phrase: " + str(list(pro_root_phrase_count_table_dic[name].iloc[0:5][name+'_pro_root_phrase'])))
    print("con phrase: " + str(list(con_root_phrase_count_table_dic[name].iloc[0:5][name+'_con_root_phrase'])))
    pro_root_phrase_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_pro_root_phrase', y='counts',title=name + " _pro_root_phrase cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    con_root_phrase_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_con_root_phrase', y='counts',title=name + " _con_root_phrase cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    
overall
pro phrase: ['great benefit', 'smart peopl', 'good benefit', 'good pay', 'great place']
con phrase: ['big compani', 'hard work', 'upper manag', 'worklif balanc', 'other compani']
amazon
pro phrase: ['smart peopl', 'good pay', 'great benefit', 'good benefit', 'good work']
con phrase: ['hard work', 'short break', 'fast pace', 'worklif balanc', 'high turnov']
google
pro phrase: ['free food', 'smart peopl', 'great benefit', 'great peopl', 'great perk']
con phrase: ['big compani', 'smart peopl', 'larg compani', 'other compani', 'hard work']
apple
pro phrase: ['great benefit', 'good benefit', 'great peopl', 'great compani', 'good pay']
con phrase: ['retail hour', 'full time', 'retail store', 'retail job', 'retail environ']
facebook
pro phrase: ['smart peopl', 'free food', 'great benefit', 'open cultur', 'great perk']
con phrase: ['worklif balanc', 'big compani', 'other compani', 'ani con', 'fast pace']
microsoft
pro phrase: ['smart peopl', 'great benefit', 'good benefit', 'good work', 'great place']
con phrase: ['big compani', 'upper manag', 'bad manag', 'red tape', 'other compani']
netflix
pro phrase: ['free netflix', 'good pay', 'free food', 'great benefit', 'smart peopl']
con phrase: ['high perform', 'upper manag', 'high turnov', 'few month', 'other compani']
In [29]:
#start to count phrase
pro_phrase_count_dic_of_dic = {}
con_phrase_count_dic_of_dic = {}

for name in company_name_tuple:
    pro_phrase_count_dic_of_dic[name] = parse_data_as_phrase_count(pro_str_list_dic[name])
    con_phrase_count_dic_of_dic[name] = parse_data_as_phrase_count(con_str_list_dic[name])
In [30]:
#build data fram to result of count root phrase
pro_phrase_count_table_dic = {}
con_phrase_count_table_dic = {}

for name in company_name_tuple:
    pro_phrase_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_pro_phrase'):list(pro_phrase_count_dic_of_dic[name].keys()) , 'counts': list(pro_phrase_count_dic_of_dic[name].values())})
    con_phrase_count_table_dic[name] = pd.DataFrame.from_dict({(name+'_con_phrase'):list(con_phrase_count_dic_of_dic[name].keys()) , 'counts': list(con_phrase_count_dic_of_dic[name].values())})
    pro_phrase_count_table_dic[name] = pro_phrase_count_table_dic[name].sort_values(by='counts', ascending=False)
    con_phrase_count_table_dic[name] = con_phrase_count_table_dic[name].sort_values(by='counts', ascending=False)
    print(name)
    print("pro phrase: " + str(list(pro_phrase_count_table_dic[name].iloc[0:7][name+'_pro_phrase'])))
    print("con phrase: " + str(list(con_phrase_count_table_dic[name].iloc[0:7][name+'_con_phrase'])))
    pro_phrase_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_pro_phrase', y='counts',title=name + " _pro_phrase cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    con_phrase_count_table_dic[name].iloc[0:20].plot(kind = 'bar',x=name+'_con_phrase', y='counts',title=name + " _con_phrase cont",figsize=(20,10),fontsize=30, color = company_color_dic[name]).title.set_size(40)
    
overall
pro phrase: ['good pay', 'great place', 'great company', 'good work', 'great work', 'great pay', 'free food']
con phrase: ['worklife balance', 'big company', 'upper management', 'hard work', 'large company', 'middle management', 'senior management']
amazon
pro phrase: ['good pay', 'great place', 'good work', 'great pay', 'decent pay', 'great company', 'good place']
con phrase: ['worklife balance', 'hard work', 'high turnover', 'upper management', 'low pay', 'senior management', 'mandatory overtime']
google
pro phrase: ['free food', 'great place', 'great company', 'great culture', 'great work', 'good work', 'good pay']
con phrase: ['big company', 'large company', 'middle management', 'worklife balance', 'red tape', 'long time', 'hard work']
apple
pro phrase: ['great company', 'good pay', 'great pay', 'great place', 'great culture', 'great team', 'great work']
con phrase: ['worklife balance', 'full time', 'retail job', 'upper management', 'retail store', 'low pay', 'retail environment']
facebook
pro phrase: ['free food', 'open culture', 'great culture', 'great place', 'great company', 'great food', 'great work']
con phrase: ['worklife balance', 'big company', 'free food', 'cant think', 'upper management', 'hard work', 'little bit']
microsoft
pro phrase: ['good work', 'great place', 'great company', 'good pay', 'great work', 'worklife balance', 'flexible work']
con phrase: ['big company', 'large company', 'middle management', 'worklife balance', 'upper management', 'senior management', 'red tape']
netflix
pro phrase: ['free netflix', 'good pay', 'free food', 'great pay', 'great culture', 'free lunch', 'free coffee']
con phrase: ['high performance', 'upper management', 'high turnover', '’ t', 'high pressure', 'full time', 'good thing']
In [ ]: